

#Regression Section

data <- read.csv("~/Desktop/Nunatfinal/final.csv") #Replace with path to finalized dictionary-counted csv
library(betareg) 

#This is the transform for the dependent variable, because there are zeroes in the model and beta regression is for variables *between* 0 and 1. If 
#0 and 1 are present in the data then the DV can be transformed using the formula [(y(n-1) + s) / n] (where y is the DV, n is the total number of observations,
#and s is a constant between 0 and 1; Smithson and Verkulien (2006) state that 0.5 is a "reasonable choice" for s) which is coded below 
#(Mathematical reasoning care of Smithson, Michael and Jay Verkulien (2006). "A Better Lemon Squeezer? Maximum-Likelihood Regression With Beta-Distributed 
#Dependent Variables." Psychological Methods 11 (1): 54-71)

y.transf.betareg <- function(y){
  n.obs <- sum(!is.na(y))
  (y * (n.obs - 1) + 0.5) / n.obs
}

#Beta regression assumes that the dependent variable is beta-distributed, that is in the range (0,1). These measurements are 
#often either heteroskedastic or uncorrectably skewed (or both) naturally because of this making them not well suited for standard OLS linear regression. This
#type of regression produces coefficients that are "interpretable in terms of the mean of the response", meaning that they represent an increase (or decrease)
#above the background mean (Ferrari and Cribari-Neto, 2004). Importantly, the regression model uses a logit link to fit values so that they always fall 
#between 0 and 1; thus the coefficients are interpreted as log odds of the response variable causing a rise in the dependent variable. Odds ratios can then
#be calculated by exponentiating the log-odds/coefficients, such that exp(coef) will give odds ratios for the response variable (For example, an odds ratio
#of 1.144 will represent a 14.4% greater likelihood that the response variable will produce higher values in the dependent variable, all else held constant).
#Use of the betareg package and in-depth discussion of the parameters can be found in Cribari-Neto, F and Zeileis A (2010). "Beta Regression in R." Journal of
#Statistical Software 34 (2): 1-24.

#Positions are conflated into a factor variable:
data$cat <- with(data, factor(1 + a + 2*cp + 3*om + 4*cmwfm, levels = 1:5,
                              labels = c("Others", "AngajukKât", "Chairpersons", "Non-Minister Ordinary Member", "Minister")))
data$cat[which(data$om == 1 & data$cmwfm == 1)] <- "Minister"

#Proportion of local to total with transform applied:
data$prop <- y.transf.betareg(data$proplocal)


#Subsets (by order of the day)
data8 <- subset(data, data$ood == 8)
data9 <- subset(data, data$ood == 9)
data15 <- subset(data, data$ood == 15)
datax <- rbind(data8, data9, data15)

#Beta regression models for the above subsets as well as the aggregate set
betamod <- betareg(prop ~ cat + sex + incumyears, data = data)
betamod1 <- betareg(prop ~ cat + sex + incumyears, data = datax)
betamod2 <- betareg(prop ~ cat + sex + incumyears, data = data8)
betamod3 <- betareg(prop ~ cat + sex + incumyears, data = data9)
betamod4 <- betareg(prop ~ cat + sex + incumyears, data = data15)

#Basic summaries of each model
summary(betamod)
summary(betamod1)
summary(betamod2)
summary(betamod3)
summary(betamod4)

#Marginal Effects for each model - factor variable for position is included here but this can be replaced with the sex or incumbency variables as desired.
library(effects)
a <- effect("cat", betamod)
b <- effect("cat", betamod1)
c <- effect("cat", betamod2)
d <- effect("cat", betamod3)
e <- effect("cat", betamod4)

#The Betareg package includes p-value measurements as well as a goodness-of-fit measurement but if a ChiSq type measurement is desired then
#the linearHypothesis method from the car package can be used, for example:
car::linearHypothesis(betamod4, c("catNon-Minister Ordinary Member = 1","catChairpersons  = 1"), test = "Chisq", white.adjust = "hc1")
